In [4]:
## 사전실행코드
import polars as pl
import plotly.io as pio
pio.renderers.default = "notebook_connected"
import plotly.express as px
df_spotify = (pl.read_csv("./universal_top_spotify_songs.csv", try_parse_dates = True,
null_values = [""])
    .filter(pl.col('snapshot_date').dt.year() == 2024).sort('snapshot_date'))

df_spotify = (
    df_spotify.with_columns(pl.when(pl.col('country').is_null() == True) ## country 열이 null이면
        .then(pl.lit('WW')) ## WW로 변경
        .otherwise(pl.col('country')).alias('country')) ## 아니면 원래대로
    .drop_nulls()) ## 그 외 null이 들어간 행 삭제

key_levels = pl.Enum(["C", "C#", "D", "Eb", "E", "F", "F#", "G", "G#", "A", "Bb", "B"])

df_spotify = (df_spotify.with_columns(pl.col('key').cast(pl.String)
    .replace(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
        ["C", "C#", "D", "Eb", "E", "F", "F#", "G", "G#", "A", "Bb", "B"]))
    .with_columns(pl.col('key').cast(key_levels)).sort('key'))

df_spotify = (df_spotify.with_columns(pl.col('artists').str.split(', ')) ## ,를 기준으로 문자열을 분리
    ## 리스트의 첫 번째 아이템을 가져와서 main_vocal로 저장
    .with_columns(pl.col('artists').list.get(0, null_on_oob = True).alias('main_vocal'),
        pl.col('artists').list.tail(-1).alias('featuring')) ## 첫 번째 아이템을 제외한 나머지를featuring으로 저장
    .with_columns(pl.when(pl.col('featuring').list.len() == 0) ## 리스트 길이가 0이면
        .then(None) ## None으로 설정
        .otherwise(pl.col('featuring')).name.keep())) ## 아니면 그대로 유지

import pycountry_convert as pc

def get_continent_name(nation_code: str) -> str:
    if nation_code != 'WW':
        continent_code = pc.country_alpha2_to_continent_code(nation_code)
    else:
        continent_code = 'WW'
    continent_dict = {"NA": "North America","SA": "South America", "AS": "Asia", "AF": "Africa",
        "OC": "Oceania", "EU": "Europe", "AQ": "Antarctica", "WW": "Global"}
    return continent_dict[continent_code]

df_spotify = (df_spotify.with_columns(pl.col('country')
    ## 앞서 정의한 함수를 country 열에 적용
    .map_elements(get_continent_name, return_dtype = pl.String).alias('continent')))

10.2 수치형 변수 간 상관관계 회귀분석하기¶

In [5]:
df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).corr()
Out[5]:
shape: (16, 16)
daily_rankdaily_movementweekly_movementpopularityduration_msmodetime_signaturedanceabilityenergyloudnessspeechinessacousticnessinstrumentalnesslivenessvalencetempo
f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64f64
1.0-0.145062-0.264775-0.1128010.0360330.0119650.027519-0.062886-0.042519-0.0443740.0046150.0400690.0134880.030228-0.0570070.018175
-0.1450621.00.368973-0.1561140.0092130.0097210.00442-0.017507-0.00345-0.0187840.0009330.0009710.0133450.004058-0.003661-0.002467
-0.2647750.3689731.0-0.1765580.0095570.0104230.008576-0.029831-0.00634-0.0255190.019179-0.0005840.0083840.012274-0.0077060.000593
-0.112801-0.156114-0.1765581.00.0154870.103172-0.133768-0.0331940.0156870.136663-0.192961-0.111156-0.010133-0.056468-0.0083560.001929
0.0360330.0092130.0095570.0154871.00.0435420.033176-0.196576-0.119117-0.135113-0.1150640.0406640.02811-0.024323-0.200502-0.025502
…………………………………………
0.0400690.000971-0.000584-0.1111560.0406640.034684-0.104765-0.230753-0.527654-0.435017-0.0295841.00.017645-0.013343-0.156258-0.083366
0.0134880.0133450.008384-0.0101330.028110.0363040.013961-0.006804-0.080463-0.220426-0.055980.0176451.00.006919-0.091602-0.006889
0.0302280.0040580.012274-0.056468-0.024323-0.0024850.00932-0.1129740.1415530.0569270.058599-0.0133430.0069191.0-0.0038210.05631
-0.057007-0.003661-0.007706-0.008356-0.200502-0.086380.0126080.4176250.3581560.2833960.043455-0.156258-0.091602-0.0038211.00.031509
0.018175-0.0024670.0005930.001929-0.0255020.022574-0.067961-0.1833120.100580.0482950.075679-0.083366-0.0068890.056310.0315091.0
In [7]:
fig = px.imshow((df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).corr().with_columns(pl.
all().round(1))),
    y = df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).columns,
    text_auto = True, aspect = "auto", color_continuous_scale = "RdBu_r")
fig.show()
In [8]:
(df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).corr()
    .with_columns(index = pl.lit(pl.Series(df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).
columns)))
    .unpivot(index = 'index')
    .filter(pl.col('index') != pl.col('variable'))
    .filter((pl.col('value') > 0.5) | (pl.col('value') < -0.5))
    .sort('value', descending = True))
Out[8]:
shape: (4, 3)
indexvariablevalue
strstrf64
"loudness""energy"0.724866
"energy""loudness"0.724866
"acousticness""energy"-0.527654
"energy""acousticness"-0.527654
In [9]:
fig = px.scatter(df_spotify.sample(fraction = 0.1, seed = 123),
    x = 'loudness', y = 'energy', trendline = 'ols', trendline_color_override = "red", opacity = 0.1,
    range_y = [0, 1])
fig.show()
In [10]:
result = px.get_trendline_results(fig)
result.px_fit_results.iloc[0].summary()
Out[10]:
OLS Regression Results
Dep. Variable: y R-squared: 0.528
Model: OLS Adj. R-squared: 0.528
Method: Least Squares F-statistic: 1.434e+05
Date: Sat, 06 Dec 2025 Prob (F-statistic): 0.00
Time: 19:03:05 Log-Likelihood: 1.0000e+05
No. Observations: 128158 AIC: -2.000e+05
Df Residuals: 128156 BIC: -2.000e+05
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 0.9488 0.001 1138.448 0.000 0.947 0.950
x1 0.0460 0.000 378.679 0.000 0.046 0.046
Omnibus: 408.453 Durbin-Watson: 0.490
Prob(Omnibus): 0.000 Jarque-Bera (JB): 508.583
Skew: -0.058 Prob(JB): 3.65e-111
Kurtosis: 3.286 Cond. No. 18.8


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [11]:
fig = px.scatter(df_spotify.sample(fraction = 0.1, seed = 123),
    x = 'acousticness', y = 'energy', trendline_color_override = "red", trendline = "ols",
opacity = 0.1)
fig.show()
In [12]:
result = px.get_trendline_results(fig)
result.px_fit_results.iloc[0].summary()
Out[12]:
OLS Regression Results
Dep. Variable: y R-squared: 0.274
Model: OLS Adj. R-squared: 0.274
Method: Least Squares F-statistic: 4.839e+04
Date: Sat, 06 Dec 2025 Prob (F-statistic): 0.00
Time: 19:03:12 Log-Likelihood: 72408.
No. Observations: 128158 AIC: -1.448e+05
Df Residuals: 128156 BIC: -1.448e+05
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 0.7473 0.001 1319.663 0.000 0.746 0.748
x1 -0.3441 0.002 -219.967 0.000 -0.347 -0.341
Omnibus: 1380.436 Durbin-Watson: 0.984
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1356.962
Skew: -0.231 Prob(JB): 2.18e-295
Kurtosis: 2.798 Cond. No. 4.38


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

10.3 스포티파이 데이터로 글로벌 인기도 파악하기¶

In [13]:
expr_1 = pl.col('name').unique().len().over('main_vocal')
df_spotify_EDA1 = (
pl.concat([
    ## 글로벌 메인보컬 Top 10 산출
    (df_spotify.filter(pl.col('country') == 'WW').select(pl.col('main_vocal').alias('Global_Main_Vocal'),
        expr_1.alias('Global_Songs')).unique().sort('Global_Songs', descending = True).head(10)),
    ## 우리나라 메인보컬 Top 10 산출
    (df_spotify.filter(pl.col('country') == 'KR').select(pl.col('main_vocal').alias('KR_Main_Vocal'),
        expr_1.alias('KR_Songs')).unique().sort('KR_Songs', descending = True).head(10)),
    ## 미국 메인보컬 Top 10 산출
    (df_spotify.filter(pl.col('country') == 'US').select(pl.col('main_vocal').alias('US_Main_Vocal'),
        expr_1.alias('US_Songs')).unique().sort('US_Songs', descending = True).head(10)),
    ## 영국 메인보컬 Top 10 산출
    (df_spotify.filter(pl.col('country') == 'GB').select(pl.col('main_vocal').alias('GB_Main_Vocal'),
        expr_1.alias('GB_Songs')).unique().sort('GB_Songs', descending = True).head(10))],
    how = 'horizontal')
    .with_columns(pl.int_range(1, 11).alias('rank'))
    .select(pl.col('rank'), pl.all().exclude('rank')))
df_spotify_EDA1
Out[13]:
shape: (10, 9)
rankGlobal_Main_VocalGlobal_SongsKR_Main_VocalKR_SongsUS_Main_VocalUS_SongsGB_Main_VocalGB_Songs
i64stru32stru32stru32stru32
1"Taylor Swift"36"Jimin"21"Taylor Swift"37"Taylor Swift"36
2"Kendrick Lamar"17"Lim Young Woong"19"Future"35"Oasis"29
3"Beyoncé"17"aespa"16"Beyoncé"23"Kanye West"20
4"Future"16"DAY6"16"Kendrick Lamar"20"Beyoncé"19
5"Sabrina Carpenter"15"NewJeans"15"Kanye West"18"Eminem"19
6"Kanye West"15"Jung Kook"13"Zach Bryan"18"Kendrick Lamar"16
7"Tyler"15"V"12"Ariana Grande"17"Ariana Grande"16
8"Eminem"14"LE SSERAFIM"11"Post Malone"17"Tyler"15
9"Ariana Grande"14"YANGHONGWON"10"Eminem"17"Sabrina Carpenter"15
10"Billie Eilish"11"Taylor Swift"10"Tyler"16"Charli xcx"14
In [14]:
fig = px.bar(
    (df_spotify.filter(pl.col('country').is_in(["WW", "KR", "US", "GB"])) ## 대상 국가 필터링
        .group_by('country', 'main_vocal') ## 국가와 메인보컬로 그룹화
        .agg(pl.col('name').unique().len()) ## 노래 이름의 고윳값에 대한 개수 산출
        .sort(['country', 'name'], descending = True) ## 결과를 국가와 노래 이름으로 정렬
        .group_by('country', maintain_order = True) ## 결과를 국가명으로 다시 그룹화
        .head(10)), ## 상위 10개만 선택
    ## X축은 main_vocal, Y축과 막대 텍스트는 name으로 설정
    x = 'main_vocal', y = 'name', text = 'name',
    facet_row = 'country', facet_row_spacing = 0.07, ## facet 설정
    labels = {"main_vocal": "메인보컬", "name": "노래수"}) ## 축 라벨 설정
fig.update_xaxes(matches = None, showticklabels = True) ## X축 간의 매칭과 틱라벨을 제거
fig.show()
In [15]:
(df_spotify.filter(pl.col('country') == "WW", ## 글로벌만 필터링
        pl.col('main_vocal') == "Taylor Swift") ## 테일러 스위프트만 필터링
    .group_by(['main_vocal', 'name']) ## 메인보컬과 노래명으로 그룹화
    .len('chart in days') ## 전체 개수 산출
    .sort('chart in days', descending = True).head(10))
Out[15]:
shape: (10, 3)
main_vocalnamechart in days
strstru32
"Taylor Swift""Cruel Summer"290
"Taylor Swift""Fortnight (feat. Post Malone)"119
"Taylor Swift""I Can Do It With a Broken Hear…76
"Taylor Swift""Down Bad"32
"Taylor Swift""Who’s Afraid of Little Old Me?"27
"Taylor Swift""Guilty as Sin?"27
"Taylor Swift""So Long, London"24
"Taylor Swift""But Daddy I Love Him"24
"Taylor Swift""My Boy Only Breaks His Favorit…24
"Taylor Swift""Florida!!! (feat. Florence + T…16
In [16]:
(df_spotify.filter(pl.col('country') == "KR", pl.col('main_vocal') == "Jimin")
    .group_by(['main_vocal', 'name']).len('chart in days').sort('chart in days', descending = 
True).head(10))
Out[16]:
shape: (10, 3)
main_vocalnamechart in days
strstru32
"Jimin""Closer Than This"353
"Jimin""Like Crazy"352
"Jimin""Set Me Free Pt.2"201
"Jimin""Like Crazy (English Version)"201
"Jimin""Alone"180
"Jimin""Face-off"179
"Jimin""Smeraldo Garden Marching Band …176
"Jimin""Who"155
"Jimin""Slow Dance (feat. Sofia Carson…155
"Jimin""Be Mine"155
In [17]:
df_spotify_EDA2 = ( pl.concat([
    (df_spotify.filter(pl.col('country') == "WW") ## 글로벌 차트만 필터링
        .select(pl.col('name').alias('Global_Song'), ## 노래명 열 선택
            pl.col('main_vocal').alias('Global_Vocal'), ## 메인보컬 열 선택
            pl.col('name').len().over('name').alias('Global_Day')) ## 노래명별 노래 수 산출
    ## 고유 행만 산출 정렬 후 상위 10곡만 출력
    .unique().sort('Global_Day', descending = True).head(10)),
    (df_spotify.filter(pl.col('country') == "KR") ## 한국 차트만 필터링
        .select(pl.col('name').alias('KR_Song'),
            pl.col('main_vocal').alias('KR_Vocal'),
            pl.col('name').len().over('name').alias('KR_Day'))
        .unique().sort('KR_Day', descending = True).head(10)),
    (df_spotify.filter(pl.col('country') == "US") ## 미국 차트만 필터링
        .select(pl.col('name').alias('US_Song'),
            pl.col('main_vocal').alias('US_Vocal'),
            pl.col('name').len().over('name').alias('US_Day'))
        .unique().sort('US_Day', descending = True).head(10)),
    (df_spotify.filter(pl.col('country') == "GB")
        .select(pl.col('name').alias('GB_Song'), ## 영국 차트만 핕터링
            pl.col('main_vocal').alias('GB_Vocal'),
            pl.col('name').len().over('name').alias('GB_Day'))
        .unique().sort('GB_Day', descending = True).head(10))],
    how = 'horizontal')
    .with_columns(pl.int_range(1, 11).alias('rank')) ## 순위 열 생성
    .select(pl.col('rank'), pl.all().exclude('rank')) ## 순위 열을 앞으로 재배치
)
df_spotify_EDA2
Out[17]:
shape: (10, 13)
rankGlobal_SongGlobal_VocalGlobal_DayKR_SongKR_VocalKR_DayUS_SongUS_VocalUS_DayGB_SongGB_VocalGB_Day
i64strstru32strstru32strstru32strstru32
1"One Of The Girls (with JENNIE,…"The Weeknd"341"Closer Than This""Jimin"353"Stick Season""Noah Kahan"326"Stick Season""Noah Kahan"327
2"I Wanna Be Yours""Arctic Monkeys"341"3D (feat. Jack Harlow)""Jung Kook"353"Lose Control""Teddy Swims"325"Lose Control""Teddy Swims"320
3"Lose Control""Teddy Swims"340"Seven (feat. Latto) (Explicit …"Jung Kook"353"I Remember Everything (feat. K…"Zach Bryan"322"Beautiful Things""Benson Boone"309
4"Beautiful Things""Benson Boone"328"Standing Next to You""Jung Kook"352"Something in the Orange""Zach Bryan"317"Mr. Brightside""The Killers"300
5"The Night We Met""Lord Huron"324"Like Crazy""Jimin"352"Beautiful Things""Benson Boone"313"Cruel Summer""Taylor Swift"272
6"Cruel Summer""Taylor Swift"290"Love Me Again""V"336"Last Night""Morgan Wallen"272"Scared To Start""Michael Marcagi"266
7"LUNA""Feid"276"Grain of Sand""Lim Young Woong"327"See You Again (feat. Kali Uchi…"Tyler"263"Too Sweet""Hozier"244
8"End of Beginning""Djo"266"Do or Die""Lim Young Woong"322"Good Luck, Babe!""Chappell Roan"247"Unwritten""Natasha Bedingfield"243
9"we can't be friends (wait for …"Ariana Grande"255"London Boy""Lim Young Woong"299"Espresso""Sabrina Carpenter"241"The Night We Met""Lord Huron"232
10"Too Sweet""Hozier"251"Polaroid""Lim Young Woong"282"A Bar Song (Tipsy)""Shaboozey"239"Good Luck, Babe!""Chappell Roan"229
In [18]:
(df_spotify_EDA2.style
    .tab_header(title = "2024년 노래 Top 10") ## 표 제목 설정
    .tab_stub(rowname_col = 'rank') ## 스텁 설정
    ## 스패너 설정
    .tab_spanner("글로벌", ['Global_Song', 'Global_Vocal', 'Global_Day'])
    .tab_spanner("한국", ['KR_Song', 'KR_Vocal', 'KR_Day'])
    .tab_spanner("미국", ['US_Song', 'US_Vocal', 'US_Day'])
    .tab_spanner("영국", ['GB_Song', 'GB_Vocal', 'GB_Day'])
    ## 열 정렬 설정
    .cols_align(align = "center")
    ## 열 라벨 설정
    .cols_label(Global_Song = "노래", Global_Vocal = "메인보컬", Global_Day = "차트일수",
        KR_Song = "노래", KR_Vocal = "메인보컬", KR_Day = "차트일수",
        US_Song = "노래", US_Vocal = "메인보컬", US_Day = "차트일수",
        GB_Song = "노래", GB_Vocal = "메인보컬", GB_Day = "차트일수"))
Out[18]:
2024년 노래 Top 10
글로벌 한국 미국 영국
노래 메인보컬 차트일수 노래 메인보컬 차트일수 노래 메인보컬 차트일수 노래 메인보컬 차트일수
1 One Of The Girls (with JENNIE, Lily Rose Depp) The Weeknd 341 Closer Than This Jimin 353 Stick Season Noah Kahan 326 Stick Season Noah Kahan 327
2 I Wanna Be Yours Arctic Monkeys 341 3D (feat. Jack Harlow) Jung Kook 353 Lose Control Teddy Swims 325 Lose Control Teddy Swims 320
3 Lose Control Teddy Swims 340 Seven (feat. Latto) (Explicit Ver.) Jung Kook 353 I Remember Everything (feat. Kacey Musgraves) Zach Bryan 322 Beautiful Things Benson Boone 309
4 Beautiful Things Benson Boone 328 Standing Next to You Jung Kook 352 Something in the Orange Zach Bryan 317 Mr. Brightside The Killers 300
5 The Night We Met Lord Huron 324 Like Crazy Jimin 352 Beautiful Things Benson Boone 313 Cruel Summer Taylor Swift 272
6 Cruel Summer Taylor Swift 290 Love Me Again V 336 Last Night Morgan Wallen 272 Scared To Start Michael Marcagi 266
7 LUNA Feid 276 Grain of Sand Lim Young Woong 327 See You Again (feat. Kali Uchis) Tyler 263 Too Sweet Hozier 244
8 End of Beginning Djo 266 Do or Die Lim Young Woong 322 Good Luck, Babe! Chappell Roan 247 Unwritten Natasha Bedingfield 243
9 we can't be friends (wait for your love) Ariana Grande 255 London Boy Lim Young Woong 299 Espresso Sabrina Carpenter 241 The Night We Met Lord Huron 232
10 Too Sweet Hozier 251 Polaroid Lim Young Woong 282 A Bar Song (Tipsy) Shaboozey 239 Good Luck, Babe! Chappell Roan 229
In [19]:
## 국가별, 메인보컬별, 노래별 차트일 수 붙이기
(pl.concat([
    (df_spotify.filter(pl.col('country') == "WW", pl.col('daily_rank') == 1)
        .group_by('name')
        .agg(pl.col('main_vocal').first().alias('Global_Main_Vocal'),
        pl.len().alias('Global_Chart_Days'))
        .rename({"name": "Global_Song"}).sort('Global_Chart_Days', descending = True).head(10)),
    (df_spotify.filter(pl.col('country') == "KR", pl.col('daily_rank') == 1)
        .group_by('name')
        .agg(pl.col('main_vocal').first().alias('KR_Main_Vocal'), pl.len().alias('KR_Chart_Days'))
        .rename({"name": "KR_Song"}).sort('KR_Chart_Days', descending = True).head(10)),
    (df_spotify.filter(pl.col('country') == "US", pl.col('daily_rank') == 1)
        .group_by('name')
        .agg(pl.col('main_vocal').first().alias('US_Main_Vocal'), pl.len().alias('US_Chart_Days'))
        .rename({"name": "US_Song"}).sort('US_Chart_Days', descending = True).head(10)),
    (df_spotify.filter(pl.col('country') == "GB", pl.col('daily_rank') == 1)
        .group_by('name')
        .agg(pl.col('main_vocal').first().alias('GB_Main_Vocal'), pl.len().alias('GB_Chart_Days'))
        .rename({"name": "GB_Song"}).sort('GB_Chart_Days', descending = True).head(10))],
    how = 'horizontal')
    .with_columns(pl.int_range(1, 11).alias('rank')) ## 순위 열 만들기
    .select(pl.col('rank'), pl.all().exclude('rank')).style ## 순위 열 순서 설정
    .tab_header(title = "2024년 차트 1위 노래 Top 10") ## 표 제목 설정
    .tab_stub(rowname_col = 'rank')
    ## 스패너 설정
    .tab_spanner("글로벌", ['Global_Song', 'Global_Main_Vocal', 'Global_Chart_Days'])
    .tab_spanner("한국", ['KR_Song', 'KR_Main_Vocal', 'KR_Chart_Days'])
    .tab_spanner("미국", ['US_Song', 'US_Main_Vocal', 'US_Chart_Days'])
    .tab_spanner("영국", ['GB_Song', 'GB_Main_Vocal', 'GB_Chart_Days'])
.cols_align(align = "center") ## 열 제목 정렬
    ## 열 라벨 설정
    .cols_label(Global_Song = "노래", Global_Main_Vocal = "메인보컬", Global_Chart_Days = "차트일수",
        KR_Song = "노래", KR_Main_Vocal = "메인보컬", KR_Chart_Days = "차트일수",
        US_Song = "노래", US_Main_Vocal = "메인보컬", US_Chart_Days = "차트일수",
        GB_Song = "노래", GB_Main_Vocal = "메인보컬", GB_Chart_Days = "차트일수"))
Out[19]:
2024년 차트 1위 노래 Top 10
글로벌 한국 미국 영국
노래 메인보컬 차트일수 노래 메인보컬 차트일수 노래 메인보컬 차트일수 노래 메인보컬 차트일수
1 Die With A Smile Lady Gaga 95 Like Crazy Jimin 170 Not Like Us Kendrick Lamar 48 Stick Season Noah Kahan 73
2 Beautiful Things Benson Boone 36 Who Jimin 155 Taste Sabrina Carpenter 35 Espresso Sabrina Carpenter 65
3 Espresso Sabrina Carpenter 33 Magnetic ILLIT 11 CARNIVAL Kanye West 34 Taste Sabrina Carpenter 59
4 BIRDS OF A FEATHER Billie Eilish 24 How Sweet NewJeans 9 Please Please Please Sabrina Carpenter 27 Please Please Please Sabrina Carpenter 23
5 La Diabla Xavi 21 Supernova aespa 7 Lovin On Me Jack Harlow 23 Last Christmas Wham! 20
6 Who Jimin 20 Supernatural NewJeans 1 That’s So True Gracie Abrams 18 Too Sweet Hozier 17
7 APT. ROSÉ 19 None None None Too Sweet Hozier 17 That’s So True Gracie Abrams 13
8 Please Please Please Sabrina Carpenter 19 None None None Rockin' Around The Christmas Tree Brenda Lee 16 BACKBONE Chase & Status 9
9 i like the way you kiss me Artemas 13 None None None Die With A Smile Lady Gaga 16 Sailor Song Gigi Perez 8
10 MILLION DOLLAR BABY Tommy Richman 11 None None None Good Luck, Babe! Chappell Roan 13 Beautiful Things Benson Boone 8
In [20]:
(df_spotify.filter(pl.col('country').is_in(["WW", "KR", "US", "GB"]), ## 국가 필터링
        pl.col('daily_rank') == 1) ## 1위만 필터링
    ## 국가별 1위곡 수 산출
    .select(pl.col('country'), pl.col('name').unique().len().over('country'))
    .unique().sort('name', descending = True))
Out[20]:
shape: (4, 2)
countryname
stru32
"US"35
"GB"22
"WW"21
"KR"6
In [21]:
fig = px.line((df_spotify.
    ## 우리나라 1위곡들만 필터링
    filter(pl.col('name').is_in(["Like Crazy", "Who", "Magnetic", "How Sweet", "Supernova",
"Supernatural"]),
        pl.col('country') == 'KR')),
    x = 'snapshot_date', y = 'daily_rank', color = 'name', line_dash = 'name',
    labels = {"snapshot_date": "날짜", "daily_rank": "순위", "name": "노래"})
fig.update_yaxes(autorange = "reversed")
fig.show()
In [22]:
(df_spotify.filter(pl.col('name') == "APT.", pl.col('daily_rank') == 1)
    .select(pl.col('continent'), pl.col('country').unique().len().over('continent').alias('NO.1'))
    .unique().sort('NO.1', descending = True))
Out[22]:
shape: (5, 2)
continentNO.1
stru32
"Asia"9
"Oceania"2
"North America"2
"Europe"2
"Global"1
In [23]:
(df_spotify.filter(pl.col('name') == "APT.", pl.col('daily_rank') == 1,
    pl.col('continent') == "Global").select(pl.col('snapshot_date')))
Out[23]:
shape: (19, 1)
snapshot_date
date
2024-10-22
2024-10-23
2024-10-24
2024-10-25
2024-10-26
…
2024-11-05
2024-11-25
2024-11-26
2024-11-27
2024-12-09
In [24]:
(df_spotify.filter(pl.col('country') == "KR", pl.col('name') == "APT.")
.select(pl.col('daily_rank').min()))
Out[24]:
shape: (1, 1)
daily_rank
i64
2
In [25]:
fig = px.line((df_spotify.filter(pl.col('country').is_in(["WW", "KR", "US", "GB"]),
pl.col('name') == "APT.")),
    x = 'snapshot_date', y = 'daily_rank', color = 'country', line_dash = 'country')
fig.update_yaxes(autorange = "reversed")
fig.show()
In [26]:
df_spotify_EDA4 = (
    df_spotify.filter(pl.col('name') == "APT.").select(pl.col('country'), pl.col('continent'),
        pl.col('country').map_elements(
            lambda x: pc.country_name_to_country_alpha3(pc.country_alpha2_to_country_name(x))
                if x != "WW" else "WW", return_dtype = pl.String).alias('nation'),
                    (pl.col('country').map_elements(
                        lambda x: pc.country_alpha2_to_country_name(x) if x != "WW" else "WW",
                            return_dtype = pl.String).alias('nation_name')),
        pl.col('popularity'), pl.col('daily_rank'), pl.col('name').len().over('country').
alias('chart_days'))
            .group_by('nation').agg(pl.col('country').first(),
        pl.col('nation_name').first(), pl.col('continent').first(),
        pl.col('popularity').mean(), pl.col('daily_rank').mean(),
        pl.col('chart_days').first()))
df_spotify_EDA4.sort('daily_rank')
Out[26]:
shape: (68, 7)
nationcountrynation_namecontinentpopularitydaily_rankchart_days
strstrstrstrf64f64u32
"SGP""SG""Singapore""Asia"90.3015871.23809563
"HKG""HK""Hong Kong""Asia"90.3015871.30158763
"MYS""MY""Malaysia""Asia"90.3015871.3174663
"TWN""TW""Taiwan, Province of China""Asia"90.3015872.3174663
"ARE""AE""United Arab Emirates""Asia"90.3015872.41269863
…………………
"PRY""PY""Paraguay""South America"86.74285740.74285735
"COL""CO""Colombia""South America"93.48571441.435
"DOM""DO""Dominican Republic""North America"89.746.010
"GTM""GT""Guatemala""North America"94.2547.754
"GRC""GR""Greece""Europe"91.049.02
In [27]:
df_spotify.select(pl.col('country').unique()).join(df_spotify_EDA4, on = 'country', how = "anti")
Out[27]:
shape: (5, 1)
country
str
"AR"
"EG"
"BY"
"NG"
"UY"
In [28]:
fig = px.choropleth(df_spotify_EDA4, locations = 'nation', color = 'popularity', scope = "world",
    hover_name = 'nation_name', color_continuous_scale = "greens", width = 800, height = 600,
    title = "로제의 APT. 인기도")
fig.show()
In [29]:
fig = px.choropleth(df_spotify_EDA4, locations = 'nation', color = 'daily_rank', scope = "world",
    hover_name = 'nation_name', color_continuous_scale = "greens_r", width = 800, height = 600,
    title = "로제의 APT. 평균 순위")
fig.show()